Tidy data

set-up

Read data

# reads excel data sheet

data <- read.csv("../data/study4.csv") # reads data
data_sat <- read.csv("../data/raw/stage3_satisfaction.csv") # reads in satisfaction data



# summary(data) # explore the data types, missing data and typos

data <- data %>%
  mutate(gender = ifelse(gender == "m", "m", "f")) # corrects an issuewith extra space in "f "

data$gender <- as.factor(data$gender)
data$pp_number <- as.factor(data$pp_number)
data$condition <-as.factor(data$condition)


# mood data
mood1 <- read.csv("../data/raw/stage1_mood.csv") %>%
  select(subject, stimulusitem2, response)%>%
  rename(response1=response, feeling = stimulusitem2)%>%
  mutate(stage = 1)

mood2 <- read.csv("../data/raw/stage2_mood.csv") %>%
  select(subject, stimulusitem2,  response)%>%
  rename(response2=response,  feeling = stimulusitem2)%>%
  mutate(stage = 2)

mood3 <- read.csv("../data/raw/stage3_mood.csv") %>%
  select(subject, stimulusitem2, response) %>%
  rename(response3=response,  feeling = stimulusitem2)%>%
  mutate(stage = 3)


# qualtrics data 
      #mainly info about drinking

data_q <- read.csv("../data/other/study_4Q.csv")%>%
  select(Q21, gender, age, Q15, Q16)%>%
  rename(subject = Q21, 
         units = Q15, 
         units_beer = Q16)%>%
  mutate(subject = as.numeric(subject),
         condition = subject %/% 1000)%>%
  filter(!is.na(subject))%>%
  mutate(subject = as.factor(subject), 
         gender=as.factor(gender), 
         age = as.numeric(age), 
         units = as.numeric(units), 
         units_beer = as.numeric(units_beer))%>%
  filter( subject != "6666")

data joins

DA <- data %>%
  select(pp_number, BMI)%>%
  rename(subject = pp_number)

DB <- data_sat %>%
  select(subject, response, trialcode ) %>%
  mutate(row = row_number(), subject = as.factor(subject)) %>%
  pivot_wider(names_from = trialcode, values_from = response) %>%
  rename( wtp = VAS_wtp)%>%
  select(- row)


D1 <- inner_join( DA,DB,
                  by = "subject",
                  keep = FALSE
                ) %>%
    pivot_longer(cols =c(VAS, wtp) , 
                 names_to = "measure", 
                 values_to = "rating") %>%
    inner_join(., data_q,
             by = "subject", 
             keep = FALSE)%>%
  filter(!is.na(rating))
                                            # long dataset incl,. info about satisfaction and willingness to pay by condition, and alcohol/ non-alcohol
# need to add focus condition


#Joining mood data

D2 <- inner_join(mood1, mood2, 
                 by = c("subject", "feeling"),
                 keep = FALSE) %>%
  inner_join(., mood3,
             by = c("subject","feeling"),
             keep = FALSE)%>%
            select(- starts_with("stage")) %>%
  mutate(condition = subject  %/% 1000) %>%
  pivot_longer(cols = starts_with("response"),
   names_to = "stage",
   names_prefix = "response",
   values_to = "rating")

describing data

BMI, gender & age

# gender
total_n <- nrow(data)
males <- data_q%>%filter(gender == "male") %>% nrow()
females <- data_q%>%filter(gender == "female") %>% nrow()
nonbinary <- data_q %>% filter(gender== "non-binary" )%>% nrow()

data_q %>%
  group_by(gender)%>%
  summarise(n=n())
## # A tibble: 3 x 2
##   gender         n
##   <fct>      <int>
## 1 female       145
## 2 male          41
## 3 non-binary     5
mean_BMI <- mean(data$BMI, na.rm = TRUE) %>%
  round(.,1)
sd_BMI <- sd(data$BMI, na.rm = TRUE) %>%
  round(., 1)

# by condition
gender_tib <- D1 %>% 
  filter(measure == "wtp") %>% # avoids duplicating data
  group_by( condition) %>%
  summarise(n=n(),
            BMI = mean(BMI, na.rm = TRUE), 
            age = mean(age, na.rm = TRUE)) %>% 
  kable()

gender_tib
condition n BMI age
1 30 22.41307 20.55172
2 30 23.29661 20.34483
3 29 22.28333 19.41379
4 32 23.05429 21.62500
5 38 24.10018 19.97222
6 30 23.09282 19.57143
## by condition
conditions_tib <- data %>% 
  select(pp_number, condition, gender, BMI) %>%
  group_by( condition) %>%
  summarise(n=n(), BMI = mean(BMI, na.rm = TRUE)) %>% 
  kable()

## satisfaction
satis_tib <- D1 %>%
  group_by(condition, measure) %>%
  summarise(response = mean(rating, na.rm = TRUE))

mood

mood_cond_tib <- D2 %>% group_by(feeling, stage) %>%
  summarise(m_rating = mean(rating, na.rm = TRUE), sd_f = sd(rating))%>%  # Using dplyr functions
  mutate_if(is.numeric,
            round,
            digits = 1)%>%
  rename(mean = m_rating, sd = sd_f)%>%
  kable() %>%
  kable_styling(full_width = TRUE, 
                "striped")

There were total of 189 participants (41 males, 145 females, 5 non-binary ). The BMI of pp ranged between NA and NA, with the average BMI M = 22.9 (sd = 3.4).

Initial analyses

differences between conditions

gender

# plot
## barplot is best here

gender_cond_plot <-
  ggplot(data = data_q)+
  geom_bar(aes(x= condition, fill = gender))+
  scale_fill_manual(values = c("#d62828", "#003049", "grey65")) +
  scale_y_continuous(name="", limits=c(0, 40), breaks = seq(0,40, 5)) +
  scale_x_discrete(name = "conditions", breaks = seq(1,6,1), limits = c("1", "2", "3", "4", "5", "6"))+
  theme(panel.grid = element_blank())+
  theme_minimal()

 ggplotly(gender_cond_plot)  

Number of partivipants in each conditions

# logistic regression 

xtabs(~ gender + condition , data = data) %>%
  kable() %>%
  kable_styling()
1 2 3 4 5 6
f 26 22 23 25 30 23
m 5 8 7 6 6 8
log_model <-
  glm(condition ~ gender, data = data, family = binomial(), na.action = na.fail)

broom::glance(log_model)
## # A tibble: 1 x 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          169.     188  -84.0  172.  179.     168.         187   189
tidy_log <- broom::tidy(log_model)


cond_int <- glm(condition ~ 1, data = data, family = binomial())
cond_gend <- glm(condition ~ gender, data = data, family = binomial())
anova(cond_int, cond_gend, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: condition ~ 1
## Model 2: condition ~ gender
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       188     168.69                     
## 2       187     168.10  1  0.59389   0.4409

Gender is not associated with condition, ie. the gender split is roughly equal accross the conditions p = 0, 0.455.

Age

data_desc <- D1 %>%
  filter(measure == "wtp")

ggplot(aes(x=condition, y = age, color = gender), data= data_desc)+
  geom_jitter( position=position_jitter(0.2), cex=1.5 )+
  stat_summary(fun.y=mean, geom="point", shape=18,
                 size=3, color="#5D5F71")+
  stat_summary(fun.data=mean_cl_boot, 
                 geom="pointrange", color="#5D5F71")+
  scale_y_continuous(name="age", limits=c(18, 60), breaks = seq(0,60, 10)) +
  scale_x_discrete(name = "conditions", limits = c("1", "2", "3", "4", "5", "6"))+
  scale_color_manual(values = c("#d62828", "#003049", "grey65")) +
  theme_minimal()
Participants' age by condition and gedner

Participants’ age by condition and gedner

BMI

ggplot(aes(x=condition, y = BMI, color = gender), data= data_desc)+
  geom_jitter( position=position_jitter(0.2), cex=1.5 )+
  stat_summary(fun.y=mean, geom="point", shape=18,
                 size=3, color="#5D5F71")+
  stat_summary(fun.data=mean_cl_boot,  
                 geom="pointrange", color="#5D5F71")+
  scale_y_continuous(name="BMI", limits=c(18, 35), breaks = seq(0,35, 10)) +
  scale_x_discrete(name = "conditions", limits = c("1", "2", "3", "4", "5", "6"))+
  scale_color_manual(values = c("#d62828", "#003049", "grey65")) +
  theme_minimal()
 Participants' BMI by condition and gender

Participants’ BMI by condition and gender

baseline mood

mood_base <- D2 %>%
  filter(stage == 1)

palette <- c("#033f63","#28666e","#7c9885","#b5b682","#fedc97","#e5e1ee","#c17767","#210203","#eb9486","#82204a", "#0a2342","#2ca58d","#84bc9c","#fffdf7","#f46197","#fec601")

ggplot(data = mood_base, aes(x=condition, y = rating, group = condition, fill = feeling))+
  geom_boxplot()+
  facet_wrap("feeling")+
  scale_fill_manual(values = palette) +
  scale_y_continuous(name="", limits=c(0, 100), breaks = seq(0,100, 20)) +
  theme(panel.grid = element_blank())+
  theme_minimal()

drinking